# Baseline function to run a single iteration of the simulation
# Variables:
# N = sample size (default to 10000)

# never_takers = allows for P(Compliance) = 0 (otherwise min. prob set to 0.0001)

# blocking_variable = a vector that contains string(s) denoting how we should be blocking
# - default set to: c("compliance")
# - Potential string inputs: "covariates", "noise", "compliance" (Can take in numerous inputs for this)

# rho: correlation between X1 and X2, as well as X2 and X3 (default set to 0.5)

# compliance_rate = 0.1

# beta = parameter that provides the magnitude for ER violation
run_iter<-function(i, N = 10000, never_takers = TRUE, blocking_variables = "compliance",
                   rho = 0.5, compliance_rate = 0.1, beta=0, gamma=0, discrete=FALSE){
  #------------------------------------------------------------------------------------------------
  #Set up data:
  #------------------------------------------------------------------------------------------------
  X1 = rnorm(N, 0, 1)
  if(discrete==TRUE){
    X1 = as.numeric(gtools::quantcut(X1))
  }
  ## low compliance rate version
  X0 = rnorm(N, 0, 1)
  X2 = rho*X1 + sqrt(1 - rho^2)*X0

  lp = (X2 - mean(X2))/sd(X2)*0.25
  comp_prob = pnorm(lp + qnorm(compliance_rate))

  X3 = 0.5*X2 + sqrt(1 - 0.5^2)*rnorm(N, 0, 1)

  #Noise:
  X4 = rnorm(N, 0, 1)
  ## generate potential covariates
  Y0 = X3 + 15 * X1 + rnorm(N, 0, 0.25)
  Y1 = Y0 + 2 + 2 * X3 + 10 * X1 + rnorm(N, 0, 0.1)

  if(gamma != 0){
    if(blocking_variables=="compliance"){
      X2 = gamma*X2 + sqrt(1-gamma^2)*rnorm(N, 0, 1)
    }
    if(blocking_variables=="covariates"){
      X1 = gamma*X1 + sqrt(1-gamma^2)*rnorm(N, 0, 1)
    }
  }
  ## 1% complier rate
  ## draw complier status
  complier = rbinom(N, size = 1, prob = comp_prob)

  # Calculate true ATE
  ATE = mean(Y1 - Y0)

  # Calculate compliance average treatment effect (CACE)
  CACE = mean(Y1[complier == 1]+beta) - mean(Y0[complier == 1])

  # Set up blocking variables:
  if (all(!blocking_variables %in% c("covariates", "compliance", "noise"))) {
    print("Invalid input for blocking variables")
    return()
  }

  blocking_variables[which(blocking_variables == "covariates")] = "X1"
  blocking_variables[which(blocking_variables == "compliance")] = "X2"
  blocking_variables[which(blocking_variables == "noise")] = "X4"

  X_block = data.frame(X1 = X1, X2 = X2, X4 = X4, score = comp_prob)
  my_distances <- distances(data.frame(X_block, score = comp_prob),
                            dist_variables = blocking_variables)
  #------------------------------------------------------------------------------------------------
  #BLOCKING BASED ESTIMATORS
  #------------------------------------------------------------------------------------------------
  # Make blocking with at least 2 units in each block
  my_blocking <- quickblock(my_distances, size_constraint = 2L)

  if(length(unique(my_blocking)) != round(N/2)){
    my_treatments <- tibble(1:N, my_blocking) %>%
      group_by(my_blocking) %>%
      mutate(my_treatments = sample(rep(c(1, 0), c(ceiling(n()/2), n() - ceiling(n()/2)))))
    my_treatments<-my_treatments$my_treatments
  }else{
    my_treatments <- tibble(1:N, my_blocking) %>%
      group_by(my_blocking) %>%
      mutate(my_treatments = sample(rep(c(1, 0), c(1, n() - 1))))
    my_treatments <- my_treatments$my_treatments
  }

  T = my_treatments

  Y = ifelse(T == 1 & complier == 1, Y1, Y0)

  # Exclusion Restriction Violation
  Y = Y + (T==1)*beta

  treatment_received = ifelse(T==1 & complier==1, 1, 0)
  #------------------------------------------------------------------------------------------------
  #--Block DiM
  total_compliers = sum(complier == 1 & T==1)

  df_temp =tibble(Y, T, complier, my_blocking) %>%
    group_by(my_blocking, .group='drop' ) %>%
    summarize(
      block_weight = sum(complier[T==1])/total_compliers,
      Y1_sum = mean(Y[T==1 & complier == 1]),
      Y0_sum = mean(Y[T==0]),
      tau.b = mean(Y[T==1 & complier == 1]) - mean(Y[T==0])) %>% ungroup()

  df_temp = na.omit(df_temp)
  block_dim = sum(df_temp$block_weight*df_temp$tau.b)
  block_dim_se = sqrt(1/(total_compliers-1) * sum(df_temp$block_weight*(df_temp$tau.b - block_dim)^2))
  #------------------------------------------------------------------------------------------------
  #--Blocked IV:
  iv_block_model = iv_robust(Y~treatment_received | T,
                             fixed_effects = ~my_blocking,
                             try_cholesky = TRUE, se_type = "HC1")
  iv_block = as.numeric(coef(iv_block_model))
  iv_block_se = iv_block_model$std.error
  #------------------------------------------------------------------------------------------------
  #--Blocked PSW:
  df_X = X_block[,which(names(X_block) %in% blocking_variables)]
  #Need to estimate the weights:
  df_temp = data.frame(complier, df_X)
  weights_model = glm(complier~., data = df_temp[T==1,], family="binomial")
  weights = predict(weights_model, df_temp, 'response')

  w = ifelse(T==0, weights, complier)
  psw = lm_lin(Y~T, covariates = ~my_blocking, weights=w, try_cholesky=TRUE)
  psw_block = coef(psw)[2]
  psw_block_se = psw$std.error[2]

  #Placebo
  model_placebo = lm_robust(Y[complier == 1] ~ T[complier == 1], clusters= my_blocking[complier==1])
  placebo_block = model_placebo$coefficients[2]
  placebo_block_se = model_placebo$std.error[2]
  #------------------------------------------------------------------------------------------------
  #SRS BASED ESTIMATORS:
  #------------------------------------------------------------------------------------------------
  T.srs = as.numeric(1:N %in% sample.int(N, size = N/2))
  Y.srs = ifelse(T.srs == 1 & complier == 1, Y1, Y0)

  #Exclusion restriction violation:
  Y.srs = Y.srs + (T.srs==1)*beta

  T.srs_received = ifelse(T.srs == 1 & complier == 1, 1, 0)

  #---IV:
  model_iv = iv_robust(Y.srs ~ T.srs_received | T.srs,
                       try_cholesky = TRUE, se_type='HC1')
  iv = coef(model_iv)[2]
  iv_se = model_iv$std.error[2]


  #---PSW
  df_X = X_block[,which(names(X_block) %in% blocking_variables)]
  #Need to estimate the weights:
  df_temp = data.frame(complier, df_X)
  weights_model = glm(complier~., data = df_temp[T.srs==1,], family="binomial")
  weights = predict(weights_model, df_temp, 'response')

  weights_c0 = weights[T.srs == 0]/sum(weights[T.srs == 0])
  mu_c0 = sum(Y.srs[T.srs==0]*weights_c0)
  mu_c1 = mean(Y.srs[T.srs == 1 & complier == 1])
  psw = mu_c1 - mu_c0

  #Variance estimate:
  varY1 = var(Y.srs[T.srs == 1 & complier == 1])/sum(complier*T.srs)
  varY0 = var(Y.srs[T==0])*sum(weights_c0^2)/(sum(weights_c0)^2)
  psw_se = sqrt(varY1+varY0)

  #Placebo SRS
  model_placebo_srs = lm_robust(Y.srs[complier == 1] ~ T.srs[complier == 1])
  placebo_srs = model_placebo_srs$coefficients[2]
  placebo_srs_se = model_placebo_srs$std.error[2]

  #--As treated
  AT = mean(Y.srs[T.srs_received == 1]) - mean(Y.srs[T.srs_received == 0])
  #--Per Protocol
  per_protocol = mean(Y.srs[T.srs_received == 1]) - mean(Y.srs[T.srs_received == 0])


  return(tibble(n_sample = N, block_var = paste0(blocking_variables, collapse = ", "),
                ATE, CACE, AT, per_protocol,
                block_dim, block_dim_se,
                iv_block, iv_block_se,
                psw_block, psw_block_se,
                iv, iv_se,
                psw, psw_se,
                placebo_block, placebo_block_se,
                placebo_srs, placebo_srs_se))
}

#------------------------------------------------------------------------------------------------
summarize_performance<-function(results){
  mse<-results %>% summarize(
    iv = mean((iv - CACE)^2),
    iv_block = mean((iv_block - CACE)^2),
    psw = mean((psw - CACE)^2),
    block_dim = mean((block_dim - CACE)^2),
    block_psw = mean((psw_block - CACE)^2),
    att = mean((AT - CACE)^2),
    placebo_block = mean((placebo_block - CACE)^2),
    placebo_srs = mean((placebo_srs - CACE)^2)
  )
  bias<-results %>%
    summarize(
      iv = mean(abs(iv - CACE)),
      iv_block = mean(abs(iv_block - CACE)),
      psw = mean(abs(psw - CACE)),
      block_dim = mean(abs(block_dim - CACE)),
      block_psw = mean(abs(psw_block - CACE)),
      att = mean(abs(AT - CACE)),
      placebo_block = mean(abs(placebo_block - CACE)),
      placebo_srs = mean(abs(placebo_srs - CACE))
    )
  se<-results %>%
    summarize(
      iv = sd(iv),
      iv_block = sd(iv_block),
      psw = sd(psw),
      block_dim = sd(block_dim),
      block_psw = sd(psw_block),
      att = sd(AT),
      placebo_block = sd(placebo_block),
      placebo_srs = sd(placebo_srs)
    )
  return(list(MSE=mse, Bias=bias, SE=se))
}

calculate_coverage<-function(results){
  results %>% summarize(
    iv = mean(CACE > (iv- 1.96 *iv_se) & CACE < (iv + 1.96 * iv_se)),
    iv_block = mean(CACE > (iv_block - 1.96 *iv_block_se) & CACE < (iv_block + 1.96 * iv_block_se)),
    psw = mean(CACE > (psw - 1.96 *psw_se) & CACE < (psw + 1.96 * psw_se)),
    block_dim = mean(CACE > (block_dim - 1.96 * block_dim_se) & CACE < (block_dim + 1.96 * block_dim_se)),
    block_psw = mean(CACE > (psw_block - 1.96 * psw_block_se) & CACE < (psw_block + 1.96 * psw_block_se))
  )
}

interval_lengths<-function(results){
  results %>% summarize(
    iv = mean(2 * 1.96 * iv_se, na.rm=TRUE),
    iv_block = mean(2 * 1.96 * iv_block_se, na.rm=TRUE),
    psw = mean(2 * 1.96 * psw_se, na.rm=TRUE),
    block_dim = mean(2 * 1.96 * block_dim_se, na.rm=TRUE),
    block_psw = mean(2 * 1.96 * psw_block_se, na.rm=TRUE)
  )
}
